# coding:utf-8

from scrapy.contrib.spiders import CrawlSpider
from ..items import IefangItem
import scrapy
import bs4


class IefangSpider(CrawlSpider):
    #
    # 抓取 爱易房 网站的数据
    #

    name = 'iefangspider'
    allowed_domains = ['iefang.com']
    start_urls = ['http://www.iefang.com/']

    def parse(self, response):
        data = ''' <tr>
                            <td class="ke_0a">
                                <a onclick="javascript:ChangeProvince('http://www.iefang.com')" href="http://www.iefang.com">北京</a>
                            </td>
                            <td class="ke_0b">
                                <a onclick="javascript:ChangeProvince('http://dongguan.iefang.com')" href="http://dongguan.iefang.com">东莞</a>
                            </td>
                            <td class="ke_0c">
                                <a onclick="javascript:ChangeProvince('http://jinan.iefang.com')" href="http://jinan.iefang.com" >济南</a>
                            </td>
                            <td class="ke_0d">
                                <a onclick="javascript:ChangeProvince('http://qingdao.iefang.com')" href="http://qingdao.iefang.com" >青岛</a>
                            </td>
                            <td class="ke_0e">
                                <a onclick="javascript:ChangeProvince('http://tianjin.iefang.com')" href="http://tianjin.iefang.com" >天津</a>
                            </td>
                        </tr>
                        <tr>
                            <td class="ke_0b">
                                <a onclick="javascript:ChangeProvince('http://dalian.iefang.com')" href="http://dalian.iefang.com" >大连</a>
                            </td>
                        </tr>
                        <tr>
                            <td class="ke_0a">
                                <a onclick="javascript:ChangeProvince('http://chongqing.iefang.com')" href="http://chongqing.iefang.com" >重庆</a>
                            </td>
                            <td class="ke_0c">
                                <a onclick="javascript:ChangeProvince('http://kunming.iefang.com')" href="http://kunming.iefang.com" >昆明</a>
                            </td>
                            <td class="ke_0d">
                                <a onclick="javascript:ChangeProvince('http://shanghai.iefang.com')" href="http://shanghai.iefang.com" >上海</a>
                            </td>
                        </tr>
                        <tr>
                            <td class="ke_0a">
                                <a onclick="javascript:ChangeProvince('http://chengdu.iefang.com')" href="http://chengdu.iefang.com" >成都</a>
                            </td>
                            <td class="ke_0b">
                                <a onclick="javascript:ChangeProvince('http://guangzhou.iefang.com')" href="http://guangzhou.iefang.com" >广州</a>
                            </td>
                            <td class="ke_0d">
                                <a onclick="javascript:ChangeProvince('http://shenzhen.iefang.com')" href="http://shenzhen.iefang.com" >深圳</a>
                            </td>
                        </tr>
                        <tr>
                            <td class="ke_0a">
                                <a onclick="javascript:ChangeProvince('http://changsha.iefang.com')" href="http://changsha.iefang.com" >长沙</a>
                            </td>
                            <td class="ke_0c">
                                <a onclick="javascript:ChangeProvince('http://nanjing.iefang.com')" href="http://nanjing.iefang.com" >南京</a>
                            </td>
                            <td class="ke_0d">
                                <a onclick="javascript:ChangeProvince('http://suzhou.iefang.com')" href="http://suzhou.iefang.com" >苏州</a>
                            </td>
                            <td class="ke_0e">
                                <a onclick="javascript:ChangeProvince('http://wuhan.iefang.com')" href="http://wuhan.iefang.com" >武汉</a>
                            </td>
                        </tr>
                        <tr>
                            <td class="ke_0a">
                                <a onclick="javascript:ChangeProvince('http://changchun.iefang.com')" href="http://changchun.iefang.com" >长春</a>
                            </td>
                            <td class="ke_0b">
                                <a onclick="javascript:ChangeProvince('http://hangzhou.iefang.com')" href="http://hangzhou.iefang.com" >杭州</a>
                            </td>
                            <td class="ke_0c">
                                <a onclick="javascript:ChangeProvince('http://ningbo.iefang.com')" href="http://ningbo.iefang.com" >宁波</a>
                            </td>
                        </tr>
                        <tr>
                            <td class="ke_0a">
                                <a onclick="javascript:ChangeProvince('http://changzhou.iefang.com')" href="http://changzhou.iefang.com" >常州</a>
                            </td>
                            <td class="ke_0d">
                                <a onclick="javascript:ChangeProvince('http://shenyang.iefang.com')" href="http://shenyang.iefang.com" >沈阳</a>
                            </td>

                        </tr>
                        <tr>
                            <td class="ke_0b">
                                <a onclick="javascript:ChangeProvince('http://hefei.iefang.com')" href="http://hefei.iefang.com" >合肥</a>
                            </td>
                            <td class="ke_0c">
                                <a onclick="javascript:ChangeProvince('http://nanchang.iefang.com')" href="http://nanchang.iefang.com" >南昌</a>
                            </td>
                            <td class="ke_0e">
                                <a onclick="javascript:ChangeProvince('http://xian.iefang.com')" href="http://xian.iefang.com" >西安</a>
                            </td>
                        </tr>
                        <tr>
                            <td class="ke_0b">
                                <a onclick="javascript:ChangeProvince('http://hainan.iefang.com')" href="http://hainan.iefang.com" >海南</a>
                            </td>
                            <td class="ke_0e">
                                <a onclick="javascript:ChangeProvince('http://xiamen.iefang.com')" href="http://xiamen.iefang.com" >厦门</a>
                            </td>
                        </tr>
'''
        soup = bs4.BeautifulSoup(data, 'lxml')

        #
        # 所有城市列表
        #

        citybox = soup.select('tr > td > a')
        for child in citybox:
            city = child.get_text()
            city_id = child.get('href').replace('http://', '').replace('.iefang.com', '')
            city_url = child.get('href')
            site_url = child.get('href') + '/NewHouse/NewHouse_Search/'
            citys = {
                'website': '爱易房', 'web_url': 'iefang.com',
                'city': city, 'city_id': city_id, 'city_url': city_url
            }
            yield scrapy.Request(site_url, callback=self.parse_city_estate, meta=citys)

    def parse_city_estate(self, response):
        meta = response.meta
        city_id = meta['city_id']
        data = response.body
        soup = bs4.BeautifulSoup(data, 'html.parser')

        #
        # 由于爱易房 城市下面区域没有 信息所以 暂时按照城市获取楼盘信息
        #

        estatebox = soup.select('.rui_1')
        if estatebox:
            for child in estatebox:
                # print(child)
                estate = child.a.get_text()
                estate_id = child.a.get('href').split('/')[-1].replace('.html', '')
                estate_url = 'http://' + city_id + '.iefang.com' + child.a.get('href')
                meta['estate'] = estate
                meta['estate_id'] = estate_id
                meta['estate_url'] = estate_url
                item = IefangItem()
                item['website'] = meta['website']
                item['web_url'] = meta['web_url']
                item['city'] = meta['city']
                item['city_id'] = meta['city_id']
                item['area'] = ''
                item['estate'] = estate
                item['estate_id'] = estate_id
                item['estate_url'] = estate_url

                yield item

            #
            # 进行翻页
            #

            pages = soup.select('.kan_77 > a')
            if pages:
                next_page = soup.select('.kan_77 > a')[-2]
                next_url = next_page.get('href')
                if next_url:
                    site_url = 'http://' + str(city_id) + '.iefang.com' + str(next_url)
                    print('下一页', site_url)
                    yield scrapy.Request(site_url, callback=self.parse_city_estate, meta=meta)
